Intro

Fit density (also referred to as CPUE) model with environmental predictors and use that to calculate weighted mean dissolved oxygen, temperature and depth of Baltic cod

# Load libraries, install if needed
library(tidyverse); theme_set(theme_light(base_size = 10))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(qwraps2)
#remotes::install_github("pbs-assess/sdmTMB")
library(sdmTMB)# To load entire cache in interactive r session, do: qwraps2::lazyload_cache_dir(path = "R/analysis/spatial_trend_models_cache/html")

For maps

# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22

map_data <- rnaturalearth::ne_countries(
  scale = "medium",
  returnclass = "sf", continent = "europe")

# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
  st_crop(map_data,
          c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))

# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)

ggplot(swe_coast_proj) + geom_sf() 


# Define plotting theme for main plot
theme_plot <- function(base_size = 10, base_family = "") {
  theme_light(base_size = 10, base_family = "") +
    theme(
      axis.text.x = element_text(angle = 90),
      axis.text = element_text(size = 8),
      legend.text = element_text(size = 8),
      legend.title = element_text(size = 8),
      legend.position = "bottom",
      legend.key.height = unit(0.2, "cm"),
      legend.margin = margin(0, 0, 0, 0),
      legend.box.margin = margin(-5, -5, -5, -5),
      strip.text = element_text(size = 8, colour = 'black', margin = margin()),
      strip.background = element_rect(fill = "grey90")
      )
}

# Define plotting theme for facet_wrap map with years
theme_facet_map <- function(base_size = 10, base_family = "") {
  theme_light(base_size = 10, base_family = "") +
    theme(
        axis.text.x = element_text(angle = 90),
        axis.text = element_text(size = 6),
        strip.text = element_text(size = 8, colour = 'black', margin = margin()),
        strip.background = element_rect(fill = "grey90")
      )
}

# Make default base map plot
plot_map_raster <- 
ggplot(swe_coast_proj) + 
  geom_sf(size = 0.3) +
  labs(x = "Longitude", y = "Latitude") +
  theme_facet_map(base_size = 14)

# Function to convert from lat lon to UTM
LongLatToUTM <- function(x, y, zone){
  xy <- data.frame(ID = 1:length(x), X = x, Y = y)
  coordinates(xy) <- c("X", "Y")
  proj4string(xy) <- CRS("+proj=longlat +datum=WGS84")  ## for example
  res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
  return(as.data.frame(res))
}

Read data

d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cpue.csv")

# Calculate standardized variables
d <- d %>% 
  mutate(oxy_sc = oxy,
         temp_sc = temp,
         depth_sc = depth,
         ) %>%
  mutate_at(c("oxy_sc", "temp_sc", "depth_sc"),
            ~(scale(.) %>% as.vector)) %>% 
  mutate(year = as.integer(year)) %>% 
  drop_na(oxy, depth, temp) %>% 
  rename("density_cod" = "density") # to fit better with how flounder is named

Read the prediction grids

# And now read in pred_grid2 which has oxygen values at location and time and depth:
pred_grid2 <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/pred_grid2.csv")

# Standardize data with respect to prediction grid:
pred_grid2 <- pred_grid2 %>%
  mutate(year = as.integer(year)) %>% 
  filter(year %in% c(unique(d$year))) %>% 
  mutate(depth_sc = (depth - mean(d$depth))/sd(d$depth),
         temp_sc = (temp - mean(d$temp))/sd(d$temp),
         oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy)) %>% # Need to scale these to the mean and sd in the data!
  drop_na(oxy, depth, temp)

# Add ices_rect
pred_grid2$ices_rect <- ices.rect2(pred_grid2$lon, pred_grid2$lat) 

Make spde mesh

spde <- make_mesh(d, xy_cols = c("X", "Y"),
                  n_knots = 100, 
                  type = "kmeans", seed = 42)

# Plot and save spde
png(file = "figures/supp/density_spde.png", units = "in", width = 6.5, height = 6.5, res = 300)
plot(spde)
dev.off()

Fit the density models

Cod

With all covariates

# mcod1 <- sdmTMB(density_cod ~ 1 + s(depth_sc) + s(oxy_sc) + s(temp_sc),
#                 data = d, spde = spde, family = tweedie(link = "log"),
#                 fields = "AR1", include_spatial = TRUE, time = "year",
#                 spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
#                 control = sdmTMBcontrol(newton_steps = 1))
# 
# tidy(mcod1, conf.int = TRUE)
# 
# d$residualsmcod1 <- residuals(mcod1)
# qqnorm(d$residualsmcod1); abline(a = 0, b = 1)
# 
# # Check AR1 param
# mcodsd1 <- as.data.frame(summary(TMB::sdreport(mcod1$tmb_obj)))
# mcodsd1$Estimate[row.names(mcodsd1) == "ar1_phi"]
# mcodsd1$Estimate[row.names(mcodsd1) == "ar1_phi"] +
#   c(-2, 2) * mcodsd1$`Std. Error`[row.names(mcodsd1) == "ar1_phi"]

Only depth covariate

mcod2 <- sdmTMB(density_cod ~ 1 + s(depth_sc),
                data = d, spde = spde, family = tweedie(link = "log"),
                fields = "AR1", include_spatial = TRUE, time = "year",
                spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
                control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

tidy(mcod2, conf.int = TRUE)

d$residualsmcod2 <- residuals(mcod2)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
qqnorm(d$residualsmcod2); abline(a = 0, b = 1)

No covariates

# mcod3 <- sdmTMB(density_cod ~ 1,
#                data = d, spde = spde, family = tweedie(link = "log"),
#                fields = "AR1", include_spatial = TRUE, time = "year",
#                spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
#                control = sdmTMBcontrol(newton_steps = 1))
# 
# tidy(mcod3, conf.int = TRUE)
# 
# d$residualsmcod3 <- residuals(mcod3)
# qqnorm(d$residualsmcod3); abline(a = 0, b = 1)

Flounder

With all covariates

# mfle1 <- sdmTMB(density_fle ~ 1 + s(depth_sc) + s(oxy_sc) + s(temp_sc),
#                 data = d, spde = spde, family = tweedie(link = "log"),
#                 fields = "AR1", include_spatial = TRUE, time = "year",
#                 spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
#                 control = sdmTMBcontrol(newton_steps = 1))
# 
# tidy(mfle1, conf.int = TRUE)
# 
# d$residualsmfle1 <- residuals(mfle1)
# qqnorm(d$residualsmfle1); abline(a = 0, b = 1)
# 
# # Check AR1 param
# mflesd1 <- as.data.frame(summary(TMB::sdreport(mfle1$tmb_obj)))
# mflesd1$Estimate[row.names(mflesd1) == "ar1_phi"]
# mflesd1$Estimate[row.names(mflesd1) == "ar1_phi"] +
#   c(-2, 2) * mflesd1$`Std. Error`[row.names(mflesd1) == "ar1_phi"]

Only depth covariate

mfle2 <- sdmTMB(density_fle ~ 1 + s(depth_sc),
                data = d, spde = spde, family = tweedie(link = "log"),
                fields = "AR1", include_spatial = TRUE, time = "year",
                spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
                control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

tidy(mfle2, conf.int = TRUE)

d$residualsmfle2 <- residuals(mfle2)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
qqnorm(d$residualsmfle2); abline(a = 0, b = 1)

No covariates

# mfle3 <- sdmTMB(density_fle ~ 1,
#                data = d, spde = spde, family = tweedie(link = "log"),
#                fields = "AR1", include_spatial = TRUE, time = "year",
#                spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
#                control = sdmTMBcontrol(newton_steps = 1))
# 
# tidy(mfle3, conf.int = TRUE)
# 
# d$residualsmfle3 <- residuals(mfle3)
# qqnorm(d$residualsmfle3); abline(a = 0, b = 1)

Now fit a simple, spatial trend condition model with depth as a covariate

dc <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cond.csv")
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   .default = col_double(),
#>   ices_rect = col_character(),
#>   IDr = col_character()
#> )
#> ℹ Use `spec()` for the full column specifications.

# Calculate standardized variables
dc <- dc %>% 
  mutate(year = as.integer(year),
         depth_sc = depth,
         ln_weight_g = log(weight_g),
         ln_length_cm = log(length_cm)) %>%
  mutate(depth_sc_sc = depth_sc - mean(depth_sc))
#> mutate: converted 'year' from double to integer (0 new NA)
#>         changed 94,533 values (100%) of 'depth_sc' (0 new NA)
#>         new variable 'ln_weight_g' (double) with 3,744 unique values and 0% NA
#>         new variable 'ln_length_cm' (double) with 110 unique values and 0% NA
#> mutate: new variable 'depth_sc_sc' (double) with 125 unique values and 0% NA

spde <- make_mesh(dc, xy_cols = c("X", "Y"),
                  n_knots = 200, 
                  type = "kmeans", seed = 42)

# Plot and save spde
png(file = "figures/supp/condition_spde.png", units = "in", width = 6.5, height = 6.5, res = 300)
plot(spde)
dev.off()
#> quartz_off_screen 
#>                 2

# Fit model
mcond <- sdmTMB(ln_weight_g ~ 1 + ln_length_cm + s(depth_sc) ,
                data = dc, spde = spde, student(link = "identity", df = 5),
                fields = "AR1", include_spatial = TRUE, time = "year",
                spatial_only = FALSE, spatial_trend = TRUE, reml = FALSE,
                control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

dc$residuals_mcond <- residuals(mcond)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

png(file = "figures/supp/condition_qq.png", units = "in", width = 6.5, height = 6.5, res = 300)
qqnorm(dc$residuals_mcond); abline(a = 0, b = 1)
dev.off()
#> quartz_off_screen 
#>                 2

Predict density models on grid

predcod <- predict(mcod2, newdata = pred_grid2)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
predfle <- predict(mfle2, newdata = pred_grid2)
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

# Condition
predcondition <- predict(mcond, newdata = mutate(pred_grid2, ln_length_cm = 0))
#> mutate: new variable 'ln_length_cm' (double) with one unique value and 0% NA
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

Plot spatial trend

# Cod
p1 <- plot_map_raster +
  geom_raster(data = filter(predcod, year == 1999), aes(x = X * 1000, y = Y * 1000, fill = zeta_s)) +
  scale_fill_gradient2() +
  ggtitle("Cod spatial trend") + 
  theme_plot()
#> filter: removed 241,306 rows (96%), 9,281 rows remaining

ggsave("figures/zeta_s_cod_map.png", width = 6.5, height = 6.5, dpi = 600)

# Flounder
p2 <- plot_map_raster +
  geom_raster(data = filter(predfle, year == 1999), aes(x = X * 1000, y = Y * 1000, fill = zeta_s)) +
  scale_fill_gradient2() +
  ggtitle("Flounder spatial trend") + 
  theme_plot()
#> filter: removed 241,306 rows (96%), 9,281 rows remaining

ggsave("figures/zeta_s_fle_map.png", width = 6.5, height = 6.5, dpi = 600)

# Cod condition
p3 <- plot_map_raster +
  geom_raster(data = filter(predcondition, year == 1999), aes(x = X * 1000, y = Y * 1000, fill = zeta_s)) +
  scale_fill_gradient2() +
  ggtitle("Cod condition spatial trend") + 
  theme_plot()
#> filter: removed 241,306 rows (96%), 9,281 rows remaining

(p1 | p2) / (p3 + (plot_spacer())) 

Do the same but filer away the lowest predictions

# Cod
plot_map_raster +
  geom_raster(data = filter(predcod, year == 1999 & est > quantile(est, prob = 0.25)), aes(x = X * 1000, y = Y * 1000, fill = zeta_s)) +
  scale_fill_gradient2() +
  ggtitle("Spatial trend effects") +
  theme_plot()
#> filter: removed 243,743 rows (97%), 6,844 rows remaining
#> Warning: Raster pixels are placed at uneven vertical intervals and will be
#> shifted. Consider using geom_tile() instead.


# Flounder
plot_map_raster +
  geom_raster(data = filter(predfle, year == 1999 & est > quantile(est, prob = 0.25)), aes(x = X * 1000, y = Y * 1000, fill = zeta_s)) +
  scale_fill_gradient2() +
  ggtitle("Spatial trend effects") +
  theme_plot()
#> filter: removed 244,347 rows (98%), 6,240 rows remaining


all_pred %>%
  filter(cod_est > quantile(cod_est, prob = 0.25),
         fle_est > quantile(fle_est, prob = 0.25)) %>%
  ggplot(., aes(fle_zeta_s, cod_zeta_s)) +
  geom_point() + 
  geom_abline(color = "red")
#> filter: removed 93,130 rows (37%), 157,457 rows remaining

At which scale do we have negative correlatons?

# All data
ggplot(d, aes(density_fle, density_cod)) +
  geom_point(alpha = 0.1)


# The above plot however, contains rectangles where one species is not abundant, 
# but the other could. Hence, this negative co-occurence
# Average densities across rectangles
d_sum <- d %>% 
  group_by(ices_rect) %>% 
  summarise(rec_density_cod = sum(density_cod),
            rec_density_fle = sum(density_fle)) %>% 
  ungroup() 
#> group_by: one grouping variable (ices_rect)
#> summarise: now 50 rows and 3 columns, ungrouped
#> ungroup: no grouping variables

# Add in the mid-coordinate of each ices rectangle
d_sum$lon <- ices.rect(d_sum$ices_rect)[, 1]
d_sum$lat <- ices.rect(d_sum$ices_rect)[, 2]

# ... And add in the UTM coords base on the lon-lat
utm_coords <- LongLatToUTM(d_sum$lon, d_sum$lat, zone = 33)
#> Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
#> prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition
d_sum$X <- utm_coords$X/1000
d_sum$Y <- utm_coords$Y/1000

# Cod
p1 <- plot_map_raster +
  geom_point(data = d_sum, aes(x = X * 1000, y = Y * 1000, color = log(rec_density_cod)),
             size = 10, shape = 15) +
  scale_color_viridis() + 
  theme_plot()

# Flounder
p2 <- plot_map_raster +
  geom_point(data = d_sum, aes(x = X * 1000, y = Y * 1000, color = log(rec_density_fle)),
             size = 10, shape = 15) +
  scale_color_viridis() + 
  theme_plot()

p1/p2


# Now filter rectangles above a certain density threshold (10th percentile)
d_filt <- d_sum %>% 
  mutate(percentile_10_cod = quantile(rec_density_cod, prob = 0.25),
         percentile_10_fle = quantile(rec_density_fle, prob = 0.25)) %>% 
  mutate(rec_keep_cod = ifelse(rec_density_cod > percentile_10_cod, "Y", "N"),
         rec_keep_fle = ifelse(rec_density_fle > percentile_10_fle, "Y", "N")) %>% 
  filter(rec_keep_cod == "Y" & rec_keep_fle == "Y")
#> mutate: new variable 'percentile_10_cod' (double) with one unique value and 0% NA
#>         new variable 'percentile_10_fle' (double) with one unique value and 0% NA
#> mutate: new variable 'rec_keep_cod' (character) with 2 unique values and 0% NA
#>         new variable 'rec_keep_fle' (character) with 2 unique values and 0% NA
#> filter: removed 20 rows (40%), 30 rows remaining

# Plot again
# Cod
p3 <- plot_map_raster +
  geom_point(data = d_filt, aes(x = X * 1000, y = Y * 1000, color = log(rec_density_cod)),
             size = 10, shape = 15) +
  scale_color_viridis() + 
  theme_plot()

# Flounder
p4 <- plot_map_raster +
  geom_point(data = d_filt, aes(x = X * 1000, y = Y * 1000, color = log(rec_density_fle)),
             size = 10, shape = 15) +
  scale_color_viridis() + 
  theme_plot()

(p1 | p2) / (p3 | p4)


# Now plot the same relationship between flounder and cod density
d %>% 
  filter(ices_rect %in% d_filt$ices_rect) %>% 
  ggplot(., aes(density_fle, density_cod)) +
  geom_point(alpha = 0.6) +
  facet_wrap(~ices_rect, scales = "free") + 
  theme_facet_map() + 
  coord_cartesian(expand = 0.02)
#> filter: removed 685 rows (20%), 2,762 rows remaining


p5 <- d %>% 
  filter(ices_rect %in% d_filt$ices_rect) %>% 
  ggplot(., aes(density_fle, density_cod)) +
  geom_point(alpha = 0.1) +
  theme_facet_map() + 
  stat_smooth() + 
  coord_cartesian(ylim = c(0, 4000), xlim = c(0, 2000))
#> filter: removed 685 rows (20%), 2,762 rows remaining

# Same, but now use the predicted estimates
d$cod_pred <- predict(mcod2, newdata = d)$est
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.
d$fle_pred <- predict(mfle2, newdata = d)$est
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k`, specifing `k` now indicates
#> an upper limit on the basis functions, and the degree of wiggliness
#> is determined by the data.

p6 <- d %>% 
  filter(ices_rect %in% d_filt$ices_rect) %>% 
  ggplot(., aes(exp(fle_pred), exp(cod_pred))) +
  geom_point(alpha = 0.1) +
  theme_facet_map() + 
  stat_smooth() + 
  coord_cartesian(ylim = c(0, 4000), xlim = c(0, 2000))
#> filter: removed 685 rows (20%), 2,762 rows remaining

p5/p6
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'


# Pred vs fitted
d %>% 
  filter(ices_rect %in% d_filt$ices_rect) %>% 
  ggplot(., aes(density_cod, exp(cod_pred))) +
  geom_point(alpha = 0.1) +
  geom_abline(color = "red") +
  theme_facet_map()
#> filter: removed 685 rows (20%), 2,762 rows remaining


d %>% 
  filter(ices_rect %in% d_filt$ices_rect) %>% 
  ggplot(., aes(density_fle, exp(fle_pred))) +
  geom_point(alpha = 0.1) +
  geom_abline(color = "red") +
  theme_facet_map()
#> filter: removed 685 rows (20%), 2,762 rows remaining


# OK, no summarize this a bit. Let's look at presence absence.
pres_abs <- d %>% 
  filter(ices_rect %in% d_filt$ices_rect) %>% 
  mutate(cod_pa = ifelse(density_cod > 0, "Y", "N"),
         fle_pa = ifelse(density_fle > 0, "Y", "N"))
#> filter: removed 685 rows (20%), 2,762 rows remaining
#> mutate: new variable 'cod_pa' (character) with 2 unique values and 0% NA
#>         new variable 'fle_pa' (character) with 2 unique values and 0% NA
  
pres_abs %>% 
  #filter(cod_pa == "Y" & fle_pa == "Y") %>% 
  ggplot(., aes(fle_pa, cod_pa)) +
  geom_jitter(alpha = 0.6) +
#  facet_wrap(~ices_rect, scales = "free") + 
#  theme_facet_map() + 
#  coord_cartesian(expand = 0.02)
  NULL


# This is not an optimal plot though because most times you catch either one or the other...